### keyword classification
library(data.table)
keywords <- stringr::str_split(legis$keywords, ",") |>  
  unlist()
keywords <- gsub("[^A-Za-z0-9]", " ", keywords) 
keywords[substr(keywords,1,1) == "c"] <- stringr::str_remove(keywords[substr(keywords,1,1) == "c"], "c  ") 
keywords <- keywords |> 
  stringr::str_squish() |>
  unique() |> 
  sort()

keywords <- dplyr::case_when(keywords == "http eurovoc europa eu 4022" ~ " ",
                             keywords == "http eurovoc europa eu 5892" ~ " ",
                             NA ~ " ",
                             TRUE~keywords)

left_right <- dplyr::case_when(keywords == "Agricultural and forestry tractors" ~ "right",
                        keywords == "Agricultural research" ~ "right",
                        keywords == "Agricultural statistics" ~ "right",
                        keywords == "Agricultural structural funds" ~ "right",
                        keywords == "Agricultural structures" ~ "right",
                        keywords == "Agriculture" ~ "right",
                        keywords == "Aid for stricken regions" ~ "right",
                        keywords == "Aid to developing countries" ~ "left",
                        keywords == "Aid to Latin American and Asian countries" ~ "left", 
                        keywords == "Air transport" ~ "right", 
                        keywords == "Animal feedingstuffs" ~ "right", 
                        keywords == "Animal health and zootechnics" ~ "right", 
                        keywords == "Anti discrimination" ~ "left", 
                        keywords == "Anti dumping measures" ~ "right", 
                        keywords == "Application of the Common Customs Tariff" ~ "right", 
                        keywords == "Application of the rules of competition to public undertakings" ~ "right",
                        keywords == "Application to migrant workers" ~ "left",   
                        keywords == "Approximation of certain social provisions" ~ "left", 
                        keywords == "Approximation of laws and health measures" ~ "left", 
                        keywords == "Arrangements covering more than one market organisation" ~ "left", 
                        keywords == "Asylum policy" ~ "right", 
                        keywords == "Banks" ~ "right", 
                        keywords == "Basic customs instruments" ~ "right",
                        keywords == "Beef and veal" ~ "right", 
                        keywords == "bonded warehouses" ~ "right", 
                        keywords == "Business activities" ~ "right", 
                        keywords == "Carry over of duties" ~ "right", 
                        keywords == "Catch quotas and management of stocks" ~ "right", 
                        keywords == "Cereals" ~ "right", 
                        keywords == "Chemicals" ~ "right", 
                        keywords == "Commercial policy" ~ "right", 
                        keywords == "Common agricultural policy mechanisms" ~ "right", 
                        keywords == "Common Customs Tariff" ~ "right", 
                        keywords == "Common customs territory" ~ "right", 
                        keywords == "Common export arrangements" ~ "right", 
                        keywords == "Conservation of resources" ~ "left", 
                        keywords == "Conservation of wild fauna and flora" ~ "left", 
                        keywords == "Consumer information"  ~ "left", 
                        keywords == "Consumers"  ~ "left", 
                        keywords == "consumers and health protection" ~ "left", 
                        keywords == "Coordination of structural instruments"  ~ "right", 
                        keywords == "Corporation tax" ~ "right", 
                        keywords == "Cosmetics" ~ "right", 
                        keywords == "Cotton"~ "right", 
                        keywords == "Countervailing duties" ~ "right", 
                        keywords == "Culture" ~ "left", 
                        keywords == "Customs cooperation" ~ "right", 
                        keywords == "Definition of declarant" ~ "right", 
                        keywords == "Development policy" ~ "left", 
                        keywords == "Dissemination of information" ~ "left", 
                        keywords == "Dominant positions" ~ "left", 
                        keywords == "Dried fodder"  ~ "right", 
                        keywords == "Economic and social cohesion fund"  ~ "right", 
                        keywords == "education and representation" ~ "left", 
                        keywords == "Eggs and poultry" ~ "right", 
                        keywords == "Elimination of barriers to trade" ~ "right", 
                        keywords == "Elimination of double taxation" ~ "right", 
                        keywords == "Elimination of internal border controls" ~ "right", 
                        keywords == "Employment and unemployment" ~ "left", 
                        keywords == "Employment incentives" ~ "left", 
                        keywords == "Environment" ~ "left", 
                        keywords == "environment and natural resources" ~ "left", 
                        keywords == "European Agricultural Fund for Rural Development"  ~ "right", 
                        keywords == "European Agricultural Guarantee Fund"  ~ "right", 
                        keywords == "European Agricultural Guidance and Guarantee Fund EAGGF"  ~ "right", 
                        keywords == "European citizenship"  ~ "left", 
                        keywords == "European Social Fund ESF"~ "left", 
                        keywords == "Excise duties"~ "right", 
                        keywords == "Export guarantees and financing"~ "right", 
                        keywords == "Export procedures" ~ "right", 
                        keywords == "Fertilisers" ~ "right", 
                        keywords == "Financial and economic Aid"  ~ "left", 
                        keywords == "Financial support" ~ "left", 
                        keywords == "Flax and hemp" ~ "right", 
                        keywords == "Flowers and live plants" ~ "right", 
                        keywords == "Food aid" ~ "left", 
                        keywords == "Foodstuffs" ~ "right", 
                        keywords == "For the recovery of claims in customs or agriculture" ~ "right", 
                        keywords == "Forests and forestry"~ "right", 
                        keywords == "Free movement of capital" ~ "right", 
                        keywords == "Free movement of goods" ~ "right", 
                        keywords == "Free movement of persons" ~ "right", 
                        keywords == "Freedom of movement for workers" ~ "right", 
                        keywords == "Freedom of movement of people"  ~ "right", 
                        keywords == "Fresh fruit and vegetables"  ~ "right", 
                        keywords == "Gender equality" ~ "left", 
                        keywords == "General customs rules" ~ "right", 
                        keywords == "General social provisions" ~ "left", 
                        keywords == "Health protection" ~ "left", 
                        keywords == "Hops" ~ "right", 
                        keywords == "Immigration and the right of nationals of third countries" ~ "left", 
                        keywords == "income and working hours" ~ "left", 
                        keywords == "Income tax" ~ "left", 
                        keywords == "Indirect taxation" ~ "right", 
                        keywords == "Individual tax exemptions" ~ "right", 
                        keywords == "Insurance" ~ "right", 
                        keywords == "Intellectual property law" ~ "right", 
                        keywords == "Internal market approximation of laws" ~ "right", 
                        keywords == "Internal market policy relating to undertakings" ~ "right", 
                        keywords == "International commodity agreements" ~ "right", 
                        keywords == "International customs cooperation" ~ "right", 
                        keywords == "Intra Community dumping practices" ~ "right", 
                        keywords == "Iron and steel industry"  ~ "right", 
                        keywords == "Isoglucose" ~ "right", 
                        keywords == "Leisure services" ~ "right", 
                        keywords == "Market access"  ~ "right", 
                        keywords == "Market monitoring"  ~ "right", 
                        keywords == "Market operation" ~ "right", 
                        keywords == "Market organisation" ~ "right", 
                        keywords == "Milk products" ~ "right", 
                        keywords == "Monitoring of atmospheric pollution" ~ "left", 
                        keywords == "Motor vehicles" ~ "right",
                        keywords == "Movement of goods" ~ "right",
                        keywords == "Multilateral cooperation for protection of the environment" ~ "left",
                        keywords == "Multilateral customs cooperation" ~ "right",
                        keywords == "Nuclear safety and radioactive waste" ~ "left",
                        keywords == "Oils and fats" ~ "right",
                        keywords == "Origin of goods" ~ "right",
                        keywords == "Other agricultural products" ~ "right",
                        keywords == "Other arrangements concerning movement of goods" ~ "right",
                        keywords == "Other commercial policy measures" ~ "right",
                        keywords == "Other economic customs arrangements" ~ "right",
                        keywords == "Other industrial sectors" ~ "right",
                        keywords == "Other measures relating to iron and steel" ~ "right",
                        keywords == "Other measures relating to oil or gas" ~ "right",
                        keywords == "Other reliefs from duty" ~ "right",
                        keywords == "Outward processing and harmonisation of standard trade"  ~ "right",
                        keywords == "Peas and beans" ~ "right",
                        keywords == "Pigmeat"~ "right",
                        keywords == "Plant health"~ "right",
                        keywords == "Police and judicial cooperation in criminal and customs matters" ~ "right",
                        keywords == "Police cooperation" ~ "right",
                        keywords == "Pollution and nuisances" ~ "left",
                        keywords == "Post clearance collection of duties" ~ "right",
                        keywords == "Prevention of noise pollution" ~ "left",
                        keywords == "Prevention of tax evasion and avoidance" ~ "left",
                        keywords == "Principles of social security" ~ "left",
                        keywords == "Processing and marketing of agricultural products" ~ "right",
                        keywords == "Products processed from fruit and vegetables" ~ "right",
                        keywords == "Products subject to market organisation" ~ "right",
                        keywords == "Promotion of the coal industry" ~ "right",
                        keywords == "Protection of animals" ~ "left",
                        keywords == "Protection of economic interests"~ "right",
                        keywords == "Protection of health and safety" ~ "left",
                        keywords == "Protection of workers" ~ "left",
                        keywords == "Raw tobacco" ~ "right",
                        keywords == "Real property" ~ "right",
                        keywords == "Recovery or remission of duties" ~ "right",
                        keywords == "Reintroduction of customs duties" ~ "left",
                        keywords == "Reliefs from duty" ~ "right",
                        keywords == "Research and technological development" ~ "right",
                        keywords == "Research sectors" ~ "left",
                        keywords == "Returned goods" ~ "right",
                        keywords == "Rice" ~ "right",
                        keywords == "Right to asylum application of international rules on asylum within the European Union" ~ "left",
                        keywords == "Rules of origin defined in the context of preferential arrangements" ~ "right",  
                        keywords == "Safety at work" ~ "left",  
                        keywords == "Seeds" ~ "right", 
                        keywords == "Seeds and seedlings" ~ "right", 
                        keywords == "Self employed activities" ~ "right", 
                        keywords == "Sheepmeat and goatmeat" ~ "right", 
                        keywords == "Shipbuilding" ~ "right", 
                        keywords ==  "Shipping" ~ "right",
                        keywords ==  "Silkworms" ~ "right",
                        keywords ==  "Social and structural measures" ~ "left",
                        keywords ==  "Social conditions" ~ "left",
                        keywords ==  "Social policy"  ~ "left",
                        keywords ==  "Social security"  ~ "left",
                        keywords ==  "Specific aid actions" ~ "left",
                        keywords ==  "State aids" ~ "left",
                        keywords ==  "State aids and other subsidies" ~ "left",
                        keywords == "State intervention" ~ "left",
                        keywords == "Stock exchanges and other securities markets" ~ "right",
                        keywords == "Sugar" ~ "right",
                        keywords == "Tariff ceilings" ~ "right",
                        keywords ==  "Tariff classification" ~ "right",
                        keywords ==  "Tariff derogations" ~ "right",
                        keywords ==  "Tariff quotas" ~ "right",
                        keywords ==  "Tariff suspensions" ~ "right",
                        keywords ==  "Taxation" ~ "left",
                        keywords ==  "Taxes on capital and transactions in securities"  ~ "left",
                        keywords ==  "Textiles" ~ "right",
                        keywords ==  "the environment and natural resources" ~ "left",
                        keywords ==  "Turnover tax VAT" ~ "left",
                        keywords ==  "Wages"~ "left",
                        keywords ==  "Waste management and clean technology"  ~ "left",
                        keywords ==  "Water protection and management"  ~ "left",
                        keywords ==  "wild fauna and flora and natural resources" ~ "left",
                        keywords ==  "Wine"~ "right",
                        keywords ==  "Working conditions" ~ "left",
                        keywords ==  "Combined transport" ~ "right",
                        keywords ==  "Common fisheries policy" ~ "right",
                        keywords ==  "Community transit" ~ "right",
                        keywords ==  "Common import arrangements" ~ "right",
                        keywords ==  "Company law" ~ "right",
                        keywords ==  "Competition principles" ~ "right",
                        keywords ==  "Competition rules" ~ "right",
                        keywords ==  "Crossing external borders" ~ "right",
                        keywords ==  "Customs debt inception" ~ "right",
                        keywords ==  "Customs tariffs" ~ "right",
                        keywords ==  "Dangerous substances" ~ "left",
                        keywords ==  "Education and training" ~ "left",
                        keywords ==  "Electrical material" ~ "right",
                        keywords ==  "Electricity" ~ "right",
                        keywords ==  "European Community s integrated tariff TARIC" ~ "right",
                        keywords ==  "exemptions and negative clearances" ~ "right",
                        keywords ==  "exigibility and extinction" ~ "left",
                        keywords ==  "Fixing of compensatory amounts" ~ "right",
                        keywords ==  "Free zones" ~ "right",
                        keywords ==  "Fuel supplies" ~ "left",
                        keywords ==  "In the application of customs or agricultural rules" ~ "right",
                        keywords ==  "Industrial policy general" ~ "left",
                        keywords ==  "Industrial policy sectoral operations" ~ "left",
                        keywords ==  "Industrial relations" ~ "left",
                        keywords ==  "industrial risk and biotechnology" ~ "left",
                        keywords ==  "Inland transport" ~ "right",
                        keywords ==  "Judicial cooperation in civil matters" ~ "right",
                        keywords ==  "Judicial cooperation in criminal matters" ~ "right",
                        keywords ==  "Multilateral transport cooperation" ~ "right",
                        keywords ==  "Other production and processing activities" ~ "right",
                        keywords ==  "Other public contracts" ~ "left",
                        keywords ==  "Other sectors for approximation of laws" ~ "right",
                        keywords ==  "Other taxes" ~ "left",
                        keywords ==  "Other taxes" ~ "left",
                        keywords ==  "Power stations and joint undertakings" ~ "right",
                        keywords ==  "Public contracts" ~ "left",
                        keywords ==  "Public services contracts" ~ "left",
                        keywords ==  "Public supply contracts" ~ "left",
                        keywords ==  "Public works contracts" ~ "left",
                        keywords ==  "Rational utilisation and conservation of energy" ~ "left",
                        keywords ==  "Right of refugees and displaced persons not eligible for asylum" ~ "left",
                        keywords ==  "Safety at sea" ~ "left",
                        keywords ==  "Specific customs rules" ~ "right",
                        keywords ==  "Transport" ~ "right",
                        keywords ==  "Transport infrastructure" ~ "right",
                        keywords ==  "Transport prices and terms" ~ "right",
                        keywords ==  "User tariffs" ~ "right",
                        keywords ==  "Value for customs purposes" ~ "right",
                        TRUE ~ "neutral")
words <- data.table(keywords, left_right)

right <- words|> 
  dplyr::filter(left_right == "right") |>
  dplyr::select(keywords)

left <- words|> 
  dplyr::filter(left_right == "left") |>
  dplyr::select(keywords)

neutral <- words|> 
  dplyr::filter(left_right == "neutral") |>
  dplyr::select(keywords)

left <-  c(left$keywords,rep(" ", 77))
right <- right$keywords
neutral <- c(neutral$keywords, rep(" ", 13))

library(xtable)
df <- data.frame("left" = left, "neutral" = neutral, "right" = right)
print(xtable(df, align = c("R{5cm}","R{5cm}", "L{5cm}", " p{5cm}"),
             caption = "Keyword classification",
             label = "tab:keywords"),
                           floating = FALSE, include.rownames = FALSE,
      tabular.environment = "longtable", size = "tiny",caption.placement = "top",
      file = "tables/keywords.tex")

keywords_coding <- function(x, words) {
  x <- unlist(stringr::str_split(x, ","))
  x <- gsub("[^A-Za-z0-9]", " ", x) 
  x[substr(x,1,1) == "c"]  <- stringr::str_remove(x[substr(x,1,1) == "c"], "c  ") 
  x <-stringr::str_squish(x) 
    
    tt <- sapply(seq(x), function(i) grepl(x[i], words$keywords))
    out <- data.table(words[ifelse(apply(tt, 1, max), TRUE,FALSE),"keywords"]) |>   
      dplyr::left_join(words, by = "keywords") 
  left  <- sum(out$left_right == "left") 
  right  <- sum(out$left_right == "right")
  log(right+.5) - log(left+.5)
  }

 legis[, keywords_score := sapply(keywords, function(x) keywords_coding(x, words)),]

